---
title: "The Prodigal Child Returns? Attitudes Towards Return Migration in a Developing Economy"
author: "Melle Scholten"
date: "Last Updated 30 March 2025, after conditional acceptance"
output:
  html_document: default
  pdf_document: default
---

This is the replication data and code for the observational component of the paper "The Prodigal Child Returns? Attitudes Towards Return Migration in a Developing Economy." Replication data and code for the experimental component of the paper are provided separately. 

Written in R version 4.4.1 (2024-06-14 ucrt) -- "Race for Your Life"

Runtime, approximately 75 seconds.

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

The following packages are necessary to replicate this code: `tidyverse`, `dplyr`, `readxl`, `patchwork`, `janitor`, `marginaleffects`, `ggplot2`, `stargazer`, `vtable`, and `easystats`. If one or more of these packages are not installed on your machine, please install them:

```{r installpack}
# install.packages("tidyverse")
# install.packages("dplyr")
# install.packages("readxl")
# install.packages("patchwork")
# install.packages("janitor")
# install.packages("marginaleffects")
# install.packages("ggplot2")
# install.packages("stargazer")
# install.packages("easystats")
```

Load Libraries:
```{r libraries, message=FALSE}
library(tidyverse)
library(dplyr)
library(readxl)
library(patchwork)
library(janitor)
library(marginaleffects)
library(ggplot2)
library(stargazer)
library(vtable)
library(easystats)
```

Import data:
```{r import}
ref_pure <- read.csv("table_2023-05-18-15-55-58.csv")
```

Clean data:
First ```select``` for used variables, and ```filter``` out expatriate voters and ```filter``` in only yes/no votes. 
```{r clean, message=FALSE}
ref_clean <- ref_pure %>% 
  select(dep = Departamento,
         code_dep = Cod...1,
         mun = Municipio,
         code_mun = Cod...3,
         vote_type = Tipo.de.votación,
         vote_count = Total,
         mesa=Mesa) %>% 
  filter(!(dep %in% c("CONSULADOS"))) %>% 
  filter(vote_type %in% c("SI", "NO")) 
```

Because the unit of observation is not the voting booth, but rather Municipalities, we want a separate data frame where votes are aggregated at that level. 

```{r vote, message=FALSE}
# Municipal Level
ref_mun <- ref_clean %>% 
  group_by(dep, mun, code_mun, vote_type) %>% 
  summarize(vote = sum(vote_count)) %>% 
  pivot_wider(id_cols = c("dep", "mun", "code_mun"),
              names_from = "vote_type",
              values_from = "vote",
              values_fill = 0) %>% 
  mutate(total = NO + SI) %>% 
  mutate(perc_yes = SI / total)
ref_mun$mun <- make_clean_names(ref_mun$mun)
ref_mun$mun = gsub("_", " ", ref_mun$mun)
ref_mun$mun <- toupper(ref_mun$mun)
```

Next, let's add the migration / displacement and violence data to the data frame.These come from CEDE Municipal Panel of the Economics Faculty of the Universidad de los Andes.

```{r migration, message=FALSE}
mig <- read_xlsx("PANEL_CONFLICTO_Y_VIOLENCIA(2022).xlsx")
mig[is.na(mig)] <- 0
mig_viol_clean <- mig %>% 
  filter(!(ano>2015)) %>% # Keep only data from prior to the referendum
  group_by(codmpio) %>% 
  summarise(
    refugees_out = sum(desplazados_expulsion), 
    refugees_in = sum(desplazados_recepcion), 
    farc_attacks = sum(tpobc_FARC), 
    farc_homicides = sum(homic_vict_FARC), 
    subversive = sum(acc_subversivas), 
    la_violencia = mean(Violencia_48_a_53),
    asssault = sum(asalt_pob),
    raids = sum(incur_pob),
    terror = sum(terrorismot),
    kidnappings = sum(secuestros),
    coca_bin = last(coca),
    coca_hec = last(H_coca)
  ) %>% 
  mutate(refugees_net = refugees_out - refugees_in)
goodgov <- read_xlsx("PANEL_BUEN_GOBIERNO(2022).xlsx")
goodgov_clean <- goodgov %>% 
  filter(ano==2016) %>% 
  select(
    codmpio = codmpio,
    iga = IGA_total
  )
```

Data on support for President Santos are taken from Replication data of Hazlett & Parente (2023), available on the Journal of Politics dataverse: https://dataverse.harvard.edu/file.xhtml?fileId=6479511&version=1.0 

```{r santos, message=FALSE}
santos <- read.csv("mapdata.csv")
santos_clean <- santos %>% 
  select(codmpio = id, 
         santos = santos14)
```

Now we need to merge all of this information into a single dataset with the municipality as the unit of observation. We also create a new variable that normalizes the number of refugees by population of the municipality. This dataframe also comes from the CEDE Municipal Panel.

```{r codes, message=FALSE}
code_mun <- read_xlsx("PANEL_CARACTERISTICAS_GENERALES(2022).xlsx") %>% 
  filter(ano==2016) %>% 
  select(
    codmpio = codmpio, 
    municipio = municipio,
    population = retro_pobl_tot
  ) 
code_mun$municipio <- make_clean_names(code_mun$municipio)
code_mun$municipio = gsub("_", " ", code_mun$municipio)
code_mun$mun <- toupper(code_mun$municipio)

code_mun <- mutate(
  code_mun, mun = fct_recode(mun, 
                             "ALBAN SAN JOSE" = "ALBAN 2", 
                             "ALTO BAUDO PIE DE PATO" = "ALTO BAUDO", 
                             "ANTIOQUIA" = "SANTAFE DE ANTIOQUIA", 
                             "AQUITANIA PUEBLOVIEJO" = "AQUITANIA", 
                             "ARBOLEDA BERRUECOS" = "ARBOLEDA", 
                             "ARIGUANI EL DIFICIL" = "ARIGUANI", 
                             "ARMERO GUAYABAL" = "ARMERO", 
                             "ARROYO HONDO" = "ARROYOHONDO", 
                             "ATRATO YUTO" = "ATRATO", 
                             "BAHIA SOLANO MUTIS" = "BAHIA SOLANO", 
                             "BAJO BAUDO PIZARRO" = "BAJO BAUDO", 
                             "BARRANCO MINAS" = "BARRANCO MINAS CD", 
                             "BOLIVAR 2" = "BOLIVAR", 
                             "BOLIVAR 3" = "BOLIVAR 2", 
                             "BOLIVAR 4" = "BOLIVAR 3", 
                             "BOLIVAR" = "CIUDAD BOLIVAR", 
                             "BOJAYA BELLAVISTA" = "BOJAYA", 
                             "BUENOS AIRES PACOA" = "PACOA CD", 
                             "BUGA" = "GUADALAJARA DE BUGA", 
                             "CACAHUAL" = "CACAHUAL CD", 
                             "CALIMA DARIEN" = "CALIMA", 
                             "CALOTO" = "CALOTO 1 3",
                             "CARMEN DE VIBORAL" = "EL CARMEN DE VIBORAL", 
                             "CERRO DE SAN ANTONIO" = "CERRO SAN ANTONIO", 
                             "CHIMA 2" = "CHIMA 1 3", 
                             "COLON GENOVA" = "COLON 2", 
                             "COLOSO RICAURTE" = "COLOSO", 
                             "COTORRA BONGO" = "COTORRA", 
                             "CUASPUD CARLOSAMA" = "CUASPUD", 
                             "EL AGUILA" = "EL A GUILA", 
                             "EL CANTON DEL SAN PABLO MAN" = "EL CANTON DEL SAN PABLO", 
                             "EL CARMEN 2" = "EL CARMEN", 
                             "EL CARMEN" = "EL CARMEN DE ATRATO", 
                             "EL CARMEN 3" = "EL CARMEN DE CHUCURI", 
                             "EL ENCANTO" = "EL ENCANTO CD", 
                             "EL TABLON" = "EL TABLON DE GOMEZ", 
                             "FRANCISCO PIZARRO SALAHONDA" = "FRANCISCO PIZARRO", 
                             "GALERAS NUEVA GRANADA" = "GALERAS", 
                             "GUACHENE" = "GUACHENE 1", 
                             "LA APARTADA FRONTERA" = "LA APARTADA",
                             "LA ARGENTINA PLATA VIEJA" = "LA ARGENTINA", 
                             "LA CHORRERA" = "LA CHORRERA CD", 
                             "LA GUADALUPE" = "LA GUADALUPE CD", 
                             "LA PEDRERA" = "LA PEDRERA CD", 
                             "LA VICTORIA 3" = "LA VICTORIA CD", 
                             "LOPEZ MICAY" = "LOPEZ", 
                             "LOS ANDES SOTOMAYOR" = "LOS ANDES", 
                             "MAGUI PAYAN" = "MAGUI", 
                             "MALLAMA PIEDRANCHA" = "MALLAMA", 
                             "MANAURE BALCON DEL CESAR MANA" = "MANAURE 2", 
                             "MAPIRIPANA" = "MAPIRIPANA CD", 
                             "MEDIO ATRATO BETE" = "MEDIO ATRATO", 
                             "MEDIO BAUDO PUERTO MELUK" = "MEDIO BAUDO", 
                             "MIRITI PARANA" = "MIRITI PARANA CD", 
                             "MONTELIBANO" = "MONTELIBANO 1 3", 
                             "MORICHAL MORICHAL NUEVO" = "MORICHAL CD", 
                             "MORICHAL PAPUNAGUA" = "PAPUNAUA CD", 
                             "NOROSI" = "NOROSI 1", 
                             "PAEZ BELALCAZAR" = "PAEZ 2", 
                             "PANA PANA CAMPO ALEGRE" = "PANA PANA CD", 
                             "PARATEBUENO LA NAGUAYA" = "PARATEBUENO", 
                             "PATIA EL BORDO" = "PATIA", 
                             "PAZ DE ARIPORO MORENO" = "PAZ DE ARIPORO", 
                             "PUERTO ALEGRIA" = "PUERTO ALEGRIA CD", 
                             "PUERTO ARICA" = "PUERTO ARICA CD", 
                             "PUERTO COLOMBIA 2" = "PUERTO COLOMBIA CD", 
                             "PUERTO NARE LA MAGDALENA" = "PUERTO NARE", 
                             "PUERTO SANTANDER 2" = "PUERTO SANTANDER", 
                             "PUERTO SANTANDER" = "PUERTO SANTANDER CD", 
                             "PURACE COCONUCO" = "PURACE", 
                             "RIO QUITO PAIMADO" = "RIO QUITO", 
                             "RIOVIEJO" = "RIO VIEJO 1 3", 
                             "ROBERTO PAYAN SAN JOSE" = "ROBERTO PAYAN", 
                             "SAN ANDRES 3" = "SAN ANDRES", 
                             "SAN ANDRES" = "SAN ANDRES DE CUERQUIA", 
                             "SAN ANDRES DE SOTAVENTO" = "SAN ANDRES SOTAVENTO 1 3", 
                             "SAN FELIPE" = "SAN FELIPE CD", 
                             "SAN JOSE DE URE" = "SAN JOSE DE URE 1", 
                             "SAN JUAN DE BETULIA BETULIA" = "SAN JUAN DE BETULIA", 
                             "SAN JUAN DE RIOSECO" = "SAN JUAN DE RIO SECO", 
                             "SAN MARTIN DE LOS LLANOS" = "SAN MARTIN 2", 
                             "SAN MIGUEL LA DORADA" = "SAN MIGUEL 2", 
                             "SANTA BARBARA ISCUANDE" = "SANTA BARBARA 2", 
                             "SANTA BARBARA 2" = "SANTA BARBARA 3", 
                             "SANTACRUZ GUACHAVES" = "SANTACRUZ", 
                             "SANTUARIO 2" = "SANTUARIO", 
                             "SANTUARIO" = "EL SANTUARIO", 
                             "SINCE" = "SAN LUIS DE SINCE", 
                             "SOTARA PAISPAMBA" = "SOTARA", 
                             "TARAPACA" = "TARAPACA CD", 
                             "TESALIA CARNICERIAS" = "TESALIA", 
                             "TIQUISIO PTO RICO" = "TIQUISIO", 
                             "TOLU" = "SANTIAGO DE TOLU", 
                             "TOLUVIEJO" = "TOLU VIEJO", 
                             "TUCHIN" = "TUCHIN 1 5", 
                             "TUMACO" = "SAN ANDRES DE TUMACO", 
                             "UBATE" = "VILLA DE SAN DIEGO DE UBATE", 
                             "UNION PANAMERICANA LAS ANIMAS" = "UNION PANAMERICANA", 
                             "VALLE DEL GUAMUEZ LA HORMIGA" = "VALLE DEL GUAMUEZ", 
                             "VILLA DE LEIVA" = "VILLA DE LEYVA", 
                             "VISTA HERMOSA" = "VISTAHERMOSA", 
                             "YAVARATE" = "YAVARATE CD", 
                             "YONDO CASABE" = "YONDO", 
                             "ZONA BANANERA SEVILLA" = "ZONA BANANERA"), mun = as.character(mun))
```

```{r merge_check, message=FALSE}
# ID Names Check, PASS
intersect(names(code_mun), names(ref_mun))
intersect(names(code_mun), names(mig_viol_clean))

# Unique Values Check, all PASS
#1 code_mun data frame
unique_code <- unique(select(code_mun, codmpio))
nrow(unique_code)
nrow(code_mun)
unique_code <- unique(select(code_mun, mun))
nrow(unique_code)
nrow(code_mun)
#2 ref_mun data frame
unique_ref <- unique(select(ref_mun, mun))
nrow(unique_ref)
nrow(ref_mun)
#3 mig_viol_clean data frame
unique_mig <- unique(select(mig_viol_clean, codmpio))
nrow(unique_mig)
nrow(mig_viol_clean)
#4 santos_clean data frame
unique_santos <- unique(select(santos_clean, codmpio))
nrow(unique_santos)
nrow(santos_clean)

# ID Values Check
#1 Match code_mun and ref_mun on municipality name, PASS
check11 <- anti_join(code_mun, ref_mun, by="mun")
table(check11$mun)
check12 <- anti_join(ref_mun, code_mun, by="mun")
table(check12$mun)

#2 Match code_mun and mig_viol_clean on DANE code, three observations: 27086, 99572, 99760 in mig, not in code_mun
# This is expected as there are three more obs in the mig dataframe than in the code_mun
check21 <- anti_join(mig_viol_clean, code_mun, by="codmpio")
table(check21$codmpio)
check22 <- anti_join(code_mun, mig_viol_clean, by="codmpio")
table(check22$codmpio)

#3 Match code_mun and santos_clean on DANE code
check31 <- anti_join(code_mun, santos_clean, by="codmpio")
table(check31$codmpio)
check32 <- anti_join(santos_clean, code_mun, by="codmpio")
table(check32$codmpio)
```

```{r merge_data, message=FALSE}
data_temp <- inner_join(code_mun, ref_mun)

data_temp <- inner_join(data_temp, santos_clean)

data_temp <- inner_join(data_temp, goodgov_clean)

data_final <- inner_join(data_temp, mig_viol_clean) %>% 
  mutate(refugees_out_norm = refugees_out / population) %>% 
  mutate(refugees_net_norm = refugees_net / population) %>% 
  mutate(refugees_in_norm = refugees_in / population) %>% 
  mutate(unemp = case_when( # Add unemployment data from DANE pdf
    codmpio == 27001 ~ 16.6, #Quibdo
    codmpio == 54001 ~ 15.1, #Cucuta AM
    codmpio == 63001 ~ 13.9, #Armenia
    codmpio == 20001 ~ 13, #Valledupar
    codmpio == 44001 ~ 12.9, #Riohacha
    codmpio == 73001 ~ 12.6, #Ibague
    codmpio == 19001 ~ 12.1, #Popayan
    codmpio == 18001 ~ 11.5, #Florencia
    codmpio == 41001 ~ 11, #Neiva
    codmpio == 70001 ~ 10.6, #Sincelejo
    codmpio == 76001 ~ 10.6, #Cali AM
    codmpio == 15001 ~ 10.5, #Tunja
    codmpio == 50001 ~ 10.5, #Villavicencio
    codmpio == 66001 ~ 10.3, #Pereira AM
    codmpio == 17001 ~ 10.3, #Manizales AM
    codmpio == 5001 ~ 10.3, #Medellin AM
    codmpio == 13001 ~ 9.9, #Cartagena
    codmpio == 47001 ~ 9.3, #Santa Marta
    codmpio == 11001 ~ 9.3, #Bogota DC
    codmpio == 52001 ~ 9.1, #Pasto
    codmpio == 23001 ~ 8.9, #Monteria
    codmpio == 8001 ~ 8.6, #Baranquilla AM
    codmpio == 68001 ~ 7.9, #Bucaramanga AM
    TRUE ~ NA
  ))
```

Descriptive statistics for the appendix (Table 8):
```{r descr_stats}
st(data_final)
```

Histogram of support for peace (Figure 4)
```{r histograms}
histogram <- ggplot(ref_mun, aes(perc_yes)) +
  geom_histogram(color = "black", fill = "grey", binwidth = 0.02) +
  xlab(" ") +
  ylab(" ") +
  theme_minimal()
ggsave("fig4.eps", plot=histogram, width=6, height=6, units="in")
```

Lastly, let's run the models and create some nice looking graphs and tables (Table 2 and Figure 5).
```{r model, message=FALSE}
reg1 <- lm(perc_yes ~ refugees_net_norm + farc_attacks + farc_homicides + subversive + santos + 
             population + coca_bin + coca_hec + iga, 
           data = data_final)
summary(reg1)

reg2 <- lm(perc_yes ~ refugees_net_norm*santos + farc_attacks + farc_homicides + subversive + 
             population + coca_bin + coca_hec + iga, 
           data = data_final)
summary(reg2)

reg3 <- lm(perc_yes ~ refugees_net_norm*unemp + farc_attacks + farc_homicides + subversive + santos + 
             population + coca_bin + coca_hec + iga, 
           data = data_final)
summary(reg3)

plot1 <- plot_slopes(model=reg2, variables="refugees_net_norm", condition="santos", rug=TRUE) +
  xlab("Support for Santos") +
  ylab("Effect of Refugee Flows on Peace Referendum") +
  geom_hline(yintercept = 0, col="black", linetype=5) +
  theme_bw()

plot2 <- plot_slopes(model=reg3, variables="refugees_net_norm", condition="unemp", rug=TRUE) +
  xlab("Unemployment in %") +
  ylab("Effect of Refugee Flows on Peace Referendum") +
  geom_hline(yintercept = 0, col="black", linetype=5) +
  theme_bw()

plot5 <- plot1 + plot2
ggsave("fig5.jpg", plot=plot5, width=6.5, height=4, units="in")

stargazer(reg1, reg2, reg3, type="latex", 
          style="apsr",
          covariate.labels = c("Net norm. refugees",
                               "Unemployment",
                               "FARC attacks",
                               "FARC homicides",
                               "Subversive actions",
                               "Support for Santos",
                               "Population",
                               "Cocaine (ext.)",
                               "Cocaine (int.)", 
                               "Good government index", 
                               "Refugees \times Santos",
                               "Refugees \times unemployment",
                               "Constant"),
          dep.var.labels = "Support for peace", 
          star.cutoffs = c(0.05, 0.01, 0.001)
          )

```

Addendum: perform diagnostics plots, checks, and robustness extensions:

```{r diagnostic_plots1}
check_model(reg1, check="qq", detrend=FALSE)
check_model(reg1, check="normality")
check_model(reg1, check="linearity")
check_model(reg1, check="homogeneity")
check_model(reg1, check="outliers")
check_model(reg1, check="vif")
check_predictions(reg1)
```


```{r diagnostic_plots2}
check_model(reg2, check="qq", detrend=FALSE)
check_model(reg2, check="normality")
check_model(reg2, check="linearity")
check_model(reg2, check="homogeneity")
check_model(reg2, check="outliers")
check_model(reg2, check="vif")
check_predictions(reg2)
```


```{r diagnostic_plots3}
check_model(reg3, check="qq", detrend=FALSE)
check_model(reg3, check="normality")
check_model(reg3, check="linearity")
check_model(reg3, check="homogeneity")
check_model(reg3, check="outliers")
check_model(reg3, check="vif")
check_predictions(reg3)
```

Check for potential collider bias from the Santos variable (Table 9)
```{r collider}
reg1_nocoll <- lm(perc_yes ~ refugees_net_norm + farc_attacks + farc_homicides + subversive + 
                    population + coca_bin + coca_hec + iga,
                  data = data_final)
summary(reg1_nocoll)

reg3_nocoll <- lm(perc_yes ~ refugees_net_norm*unemp + farc_attacks + farc_homicides + subversive + 
                    population + coca_bin + coca_hec + iga,
                  data = data_final)
summary(reg3_nocoll)

stargazer(reg1_nocoll, reg3_nocoll, type="latex", 
          style="apsr",
          covariate.labels = c("Net norm. refugees",
                               "Unemployment",
                               "FARC attacks",
                               "FARC homicides",
                               "Subversive actions",
                               "Population",
                               "Cocaine (ext.)",
                               "Cocaine (int.)",
                               "Good governance index",
                               "Refugees \times unemployment"),
          dep.var.labels = "Support for peace", 
          star.cutoffs = c(0.05, 0.01, 0.001)
          )
```

Robustness: alternative specifications for violence:
Results are no longer statistically significant in model 3, but all effects are in the hypothesized direction. Refugee flows and violence are likely highly colinear and with only 11 degrees of freedom these effects are not parsed out. 
```{r violence}
reg_viol1 <- lm(perc_yes ~ refugees_net_norm + farc_attacks + farc_homicides + subversive + santos +
                  la_violencia + asssault + raids + terror + kidnappings + 
                  population + coca_bin + coca_hec + iga,
             data = data_final)
summary(reg_viol1)

reg_viol2 <- lm(perc_yes ~ refugees_net_norm*santos + farc_attacks + farc_homicides + subversive +
                  la_violencia + asssault + raids + terror + kidnappings +
                  population + coca_bin + coca_hec + iga, 
             data = data_final)
summary(reg_viol2)

reg_viol3 <- lm(perc_yes ~ refugees_net_norm*unemp + farc_attacks + farc_homicides + subversive + santos +
                  la_violencia + asssault + raids + terror + kidnappings + 
                  population + coca_bin + coca_hec + iga, 
             data = data_final)
summary(reg_viol3)
```

Robustness: decomposing net flows into inward and outward
Results make sense except for model 3, where the marginal effect of potential returnees on support for peace increases with higher levels of unemployment. This is likely due to the observations in this group being urban areas with few outward refugees and many inward ones. The results are in line with the theory when taking the interaction of inward refugees and unemployment. At higher levels of unemployment, inward refugees are associated not with less support for peace, but more, as voters want IDPs to return home to alleviate labor market looseness. 
```{r decomp}
reg_decom1 <- lm(perc_yes ~ refugees_out_norm + refugees_in_norm + 
                   farc_attacks + farc_homicides + subversive + santos + 
                   population + coca_bin + coca_hec + iga,
                 data = data_final)
summary(reg_decom1)

reg_decom2 <- lm(perc_yes ~ refugees_out_norm*santos + refugees_in_norm + 
                   farc_attacks + farc_homicides + subversive + 
                   population + coca_bin + coca_hec + iga,
                 data = data_final)
summary(reg_decom2)

reg_decom3 <- lm(perc_yes ~ refugees_out_norm*unemp + refugees_in_norm +
                   farc_attacks + farc_homicides + subversive + santos + 
                   population + coca_bin + coca_hec + iga,
                 data = data_final)
summary(reg_decom3)

reg_decom4 <- lm(perc_yes ~ refugees_out_norm + refugees_in_norm*unemp +
                   farc_attacks + farc_homicides + subversive + santos + 
                   population + coca_bin + coca_hec + iga,
                 data = data_final)
summary(reg_decom4)
```

